@@ -81,6 +81,7 @@ gem 'string-scrub' # for ruby <2.1 |
||
81 | 81 |
gem 'therubyracer', '~> 0.12.1' |
82 | 82 |
gem 'typhoeus', '~> 0.6.3' |
83 | 83 |
gem 'uglifier', '>= 1.3.0' |
84 |
+gem 'hypdf', '~> 1.0.7' |
|
84 | 85 |
|
85 | 86 |
group :development do |
86 | 87 |
gem 'better_errors', '~> 1.1' |
@@ -174,12 +174,17 @@ GEM |
||
174 | 174 |
hipchat (1.2.0) |
175 | 175 |
httparty |
176 | 176 |
hpricot (0.8.6) |
177 |
+ httmultiparty (0.3.10) |
|
178 |
+ httparty (>= 0.7.3) |
|
179 |
+ multipart-post |
|
177 | 180 |
http (0.5.1) |
178 | 181 |
http_parser.rb |
179 | 182 |
http_parser.rb (0.6.0) |
180 | 183 |
httparty (0.13.1) |
181 | 184 |
json (~> 1.8) |
182 | 185 |
multi_xml (>= 0.5.2) |
186 |
+ hypdf (1.0.7) |
|
187 |
+ httmultiparty (= 0.3.10) |
|
183 | 188 |
i18n (0.6.11) |
184 | 189 |
jquery-rails (3.1.1) |
185 | 190 |
railties (>= 3.0, < 5.0) |
@@ -475,6 +480,7 @@ DEPENDENCIES |
||
475 | 480 |
guard-rspec |
476 | 481 |
hipchat (~> 1.2.0) |
477 | 482 |
httparty (~> 0.13) |
483 |
+ hypdf (~> 1.0.7) |
|
478 | 484 |
jquery-rails (~> 3.1.0) |
479 | 485 |
json (~> 1.8.1) |
480 | 486 |
jsonpath (~> 0.5.6) |
@@ -0,0 +1,62 @@ |
||
1 |
+require 'open-uri' |
|
2 |
+require 'hypdf' |
|
3 |
+ |
|
4 |
+module Agents |
|
5 |
+ class PdfInfoAgent < Agent |
|
6 |
+ |
|
7 |
+ gem_dependency_check { defined?(HyPDF) } |
|
8 |
+ |
|
9 |
+ cannot_be_scheduled! |
|
10 |
+ |
|
11 |
+ description <<-MD |
|
12 |
+ In order for this agent to work, you need to have [HyPDF](https://devcenter.heroku.com/articles/hypdf) running and configured. |
|
13 |
+ |
|
14 |
+ It works by acting on events that contain a key `url` in their payload, and runs the [pdfinfo](https://devcenter.heroku.com/articles/hypdf#pdfinfo) command on them. |
|
15 |
+ MD |
|
16 |
+ |
|
17 |
+ event_description <<-MD |
|
18 |
+ This will change based on the metadata in the pdf. |
|
19 |
+ |
|
20 |
+ { "Title"=>"Everyday Rails Testing with RSpec", |
|
21 |
+ "Author"=>"Aaron Sumner", |
|
22 |
+ "Creator"=>"LaTeX with hyperref package", |
|
23 |
+ "Producer"=>"xdvipdfmx (0.7.8)", |
|
24 |
+ "CreationDate"=>"Fri Aug 2 05", |
|
25 |
+ "32"=>"50 2013", |
|
26 |
+ "Tagged"=>"no", |
|
27 |
+ "Pages"=>"150", |
|
28 |
+ "Encrypted"=>"no", |
|
29 |
+ "Page size"=>"612 x 792 pts (letter)", |
|
30 |
+ "Optimized"=>"no", |
|
31 |
+ "PDF version"=>"1.5", |
|
32 |
+ "url": "your url" |
|
33 |
+ } |
|
34 |
+ MD |
|
35 |
+ |
|
36 |
+ def working? |
|
37 |
+ !recent_error_logs? |
|
38 |
+ end |
|
39 |
+ |
|
40 |
+ def default_options |
|
41 |
+ {} |
|
42 |
+ end |
|
43 |
+ |
|
44 |
+ def receive(incoming_events) |
|
45 |
+ incoming_events.each do |event| |
|
46 |
+ interpolate_with(event) do |
|
47 |
+ url_to_scrape = event.payload['url'] |
|
48 |
+ check_url(url_to_scrape, event.payload) if url_to_scrape =~ /^https?:\/\//i |
|
49 |
+ end |
|
50 |
+ end |
|
51 |
+ end |
|
52 |
+ |
|
53 |
+ def check_url(in_url, payload) |
|
54 |
+ return unless in_url.present? |
|
55 |
+ Array(in_url).each do |url| |
|
56 |
+ log "Fetching #{url}" |
|
57 |
+ info = HyPDF.pdfinfo(open(url)) |
|
58 |
+ create_event :payload => info.merge(payload) |
|
59 |
+ end |
|
60 |
+ end |
|
61 |
+ end |
|
62 |
+end |